package spider.utils.xml;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import spider.data.Analysis;
import spider.data.Attr;
import spider.data.Data;
import spider.data.Elem;
import spider.data.Tag;

/**
 * @说明：爬虫定制 xml文件，根据定制来爬取页面
 * @author: gaoll
 * @CreateTime:2014-11-14
 * @ModifyTime:2014-11-14
 */
public class XmlManage {
	
	public void xmlManage(HttpServletRequest req) throws DocumentException{
		SAXReader reader = new SAXReader();   
		System.out.println(System.getProperty("user.dir"));
        Document   document = reader.read(new File(req.getRealPath("/WEB-INF/classes/configure.xml"))); 
        Element content = document.getRootElement();
        //url
        Element url = content.element("url");
        String url_type = url.attribute("type").getText();
        Data.url_type = url_type;
        if(url_type.equals("simple")){
        	Data.url_head = url.element("url_head").getText();
        }else{
        	Data.url_head = url.element("url_head").getText();
        	Data.url_start = Long.parseLong(url.element("url_start").getText());
        	Data.url_end = Long.parseLong(url.element("url_end").getText());
        	Data.url_suffix = url.element("url_suffix").getText();
        }
        log.info(Data.url_head);
        List<Map<String,Object>> list = new ArrayList<Map<String,Object>>();
        //analysis
        Element analysis = content.element("analysis");
        
        Analysis analy = new Analysis();
        analy.setType(analysis.attribute("type").getText());
        List<Elem> el = new ArrayList<Elem>();
        
        List nodes = analysis.elements("elem");   
        for (Iterator it = nodes.iterator(); it.hasNext();) {  
        	Elem ele = new Elem();
            Element elem = (Element) it.next(); 
            ele.setName(elem.attribute("name").getText());
            
            List<Attr> at = new ArrayList<Attr>();
            List attrs = elem.elements("attr");
            // do something   
            for (Iterator ite = attrs.iterator(); ite.hasNext();) {   
	            Element att = (Element) ite.next();
	            Attr attr = new Attr();
	            attr.setType(att.attribute("type").getText());
	            attr.setNum(Integer.parseInt(att.attribute("num").getText()));
	            
	            Tag tag = new Tag();
	            tag.setName(att.element("name").getText());
	            tag.setPro(att.element("pro").getText());
	           
	            attr.setTag(tag);
	            at.add(attr);
             }
             ele.setAttrs(at);
             el.add(ele);
        }
        analy.setElements(el);
        Data.analysis = analy;
	}
	
	private static Log log = LogFactory.getLog(XmlManage.class);
}
